{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "joint-electric",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "quantitative-beverage",
   "metadata": {},
   "outputs": [],
   "source": [
    "train = pd.read_csv(\"train.csv\")\n",
    "test = pd.read_csv(\"test.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "twelve-insulin",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID_code</th>\n",
       "      <th>target</th>\n",
       "      <th>var_0</th>\n",
       "      <th>var_1</th>\n",
       "      <th>var_2</th>\n",
       "      <th>var_3</th>\n",
       "      <th>var_4</th>\n",
       "      <th>var_5</th>\n",
       "      <th>var_6</th>\n",
       "      <th>var_7</th>\n",
       "      <th>...</th>\n",
       "      <th>var_190</th>\n",
       "      <th>var_191</th>\n",
       "      <th>var_192</th>\n",
       "      <th>var_193</th>\n",
       "      <th>var_194</th>\n",
       "      <th>var_195</th>\n",
       "      <th>var_196</th>\n",
       "      <th>var_197</th>\n",
       "      <th>var_198</th>\n",
       "      <th>var_199</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>train_0</td>\n",
       "      <td>0</td>\n",
       "      <td>8.9255</td>\n",
       "      <td>-6.7863</td>\n",
       "      <td>11.9081</td>\n",
       "      <td>5.0930</td>\n",
       "      <td>11.4607</td>\n",
       "      <td>-9.2834</td>\n",
       "      <td>5.1187</td>\n",
       "      <td>18.6266</td>\n",
       "      <td>...</td>\n",
       "      <td>4.4354</td>\n",
       "      <td>3.9642</td>\n",
       "      <td>3.1364</td>\n",
       "      <td>1.6910</td>\n",
       "      <td>18.5227</td>\n",
       "      <td>-2.3978</td>\n",
       "      <td>7.8784</td>\n",
       "      <td>8.5635</td>\n",
       "      <td>12.7803</td>\n",
       "      <td>-1.0914</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>train_1</td>\n",
       "      <td>0</td>\n",
       "      <td>11.5006</td>\n",
       "      <td>-4.1473</td>\n",
       "      <td>13.8588</td>\n",
       "      <td>5.3890</td>\n",
       "      <td>12.3622</td>\n",
       "      <td>7.0433</td>\n",
       "      <td>5.6208</td>\n",
       "      <td>16.5338</td>\n",
       "      <td>...</td>\n",
       "      <td>7.6421</td>\n",
       "      <td>7.7214</td>\n",
       "      <td>2.5837</td>\n",
       "      <td>10.9516</td>\n",
       "      <td>15.4305</td>\n",
       "      <td>2.0339</td>\n",
       "      <td>8.1267</td>\n",
       "      <td>8.7889</td>\n",
       "      <td>18.3560</td>\n",
       "      <td>1.9518</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>train_2</td>\n",
       "      <td>0</td>\n",
       "      <td>8.6093</td>\n",
       "      <td>-2.7457</td>\n",
       "      <td>12.0805</td>\n",
       "      <td>7.8928</td>\n",
       "      <td>10.5825</td>\n",
       "      <td>-9.0837</td>\n",
       "      <td>6.9427</td>\n",
       "      <td>14.6155</td>\n",
       "      <td>...</td>\n",
       "      <td>2.9057</td>\n",
       "      <td>9.7905</td>\n",
       "      <td>1.6704</td>\n",
       "      <td>1.6858</td>\n",
       "      <td>21.6042</td>\n",
       "      <td>3.1417</td>\n",
       "      <td>-6.5213</td>\n",
       "      <td>8.2675</td>\n",
       "      <td>14.7222</td>\n",
       "      <td>0.3965</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>train_3</td>\n",
       "      <td>0</td>\n",
       "      <td>11.0604</td>\n",
       "      <td>-2.1518</td>\n",
       "      <td>8.9522</td>\n",
       "      <td>7.1957</td>\n",
       "      <td>12.5846</td>\n",
       "      <td>-1.8361</td>\n",
       "      <td>5.8428</td>\n",
       "      <td>14.9250</td>\n",
       "      <td>...</td>\n",
       "      <td>4.4666</td>\n",
       "      <td>4.7433</td>\n",
       "      <td>0.7178</td>\n",
       "      <td>1.4214</td>\n",
       "      <td>23.0347</td>\n",
       "      <td>-1.2706</td>\n",
       "      <td>-2.9275</td>\n",
       "      <td>10.2922</td>\n",
       "      <td>17.9697</td>\n",
       "      <td>-8.9996</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>train_4</td>\n",
       "      <td>0</td>\n",
       "      <td>9.8369</td>\n",
       "      <td>-1.4834</td>\n",
       "      <td>12.8746</td>\n",
       "      <td>6.6375</td>\n",
       "      <td>12.2772</td>\n",
       "      <td>2.4486</td>\n",
       "      <td>5.9405</td>\n",
       "      <td>19.2514</td>\n",
       "      <td>...</td>\n",
       "      <td>-1.4905</td>\n",
       "      <td>9.5214</td>\n",
       "      <td>-0.1508</td>\n",
       "      <td>9.1942</td>\n",
       "      <td>13.2876</td>\n",
       "      <td>-1.5121</td>\n",
       "      <td>3.9267</td>\n",
       "      <td>9.5031</td>\n",
       "      <td>17.9974</td>\n",
       "      <td>-8.8104</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 202 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   ID_code  target    var_0   var_1    var_2   var_3    var_4   var_5   var_6  \\\n",
       "0  train_0       0   8.9255 -6.7863  11.9081  5.0930  11.4607 -9.2834  5.1187   \n",
       "1  train_1       0  11.5006 -4.1473  13.8588  5.3890  12.3622  7.0433  5.6208   \n",
       "2  train_2       0   8.6093 -2.7457  12.0805  7.8928  10.5825 -9.0837  6.9427   \n",
       "3  train_3       0  11.0604 -2.1518   8.9522  7.1957  12.5846 -1.8361  5.8428   \n",
       "4  train_4       0   9.8369 -1.4834  12.8746  6.6375  12.2772  2.4486  5.9405   \n",
       "\n",
       "     var_7  ...  var_190  var_191  var_192  var_193  var_194  var_195  \\\n",
       "0  18.6266  ...   4.4354   3.9642   3.1364   1.6910  18.5227  -2.3978   \n",
       "1  16.5338  ...   7.6421   7.7214   2.5837  10.9516  15.4305   2.0339   \n",
       "2  14.6155  ...   2.9057   9.7905   1.6704   1.6858  21.6042   3.1417   \n",
       "3  14.9250  ...   4.4666   4.7433   0.7178   1.4214  23.0347  -1.2706   \n",
       "4  19.2514  ...  -1.4905   9.5214  -0.1508   9.1942  13.2876  -1.5121   \n",
       "\n",
       "   var_196  var_197  var_198  var_199  \n",
       "0   7.8784   8.5635  12.7803  -1.0914  \n",
       "1   8.1267   8.7889  18.3560   1.9518  \n",
       "2  -6.5213   8.2675  14.7222   0.3965  \n",
       "3  -2.9275  10.2922  17.9697  -8.9996  \n",
       "4   3.9267   9.5031  17.9974  -8.8104  \n",
       "\n",
       "[5 rows x 202 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "appreciated-affairs",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [00:03<00:00, 59.75it/s]\n"
     ]
    }
   ],
   "source": [
    "col_names = [f\"var_{i}\" for i in range(200)]\n",
    "for col in tqdm(col_names):\n",
    "    count = test[col].value_counts()\n",
    "    uniques = count.index[count == 1]\n",
    "    test[col + \"_u\"] = test[col].isin(uniques)\n",
    "\n",
    "test[\"has_unique\"] = test[[col + \"_u\" for col in col_names]].any(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "mighty-basics",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "100000"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test[\"has_unique\"].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "sustainable-palestinian",
   "metadata": {},
   "outputs": [],
   "source": [
    "real_test = test.loc[test[\"has_unique\"], [\"ID_code\"] + col_names]\n",
    "fake_test = test.loc[~test[\"has_unique\"], [\"ID_code\"] + col_names]\n",
    "train_and_test = pd.concat([train, real_test], axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "military-tiger",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [00:43<00:00,  4.64it/s]\n"
     ]
    }
   ],
   "source": [
    "for col in tqdm(col_names):\n",
    "    count = train_and_test[col].value_counts().to_dict()\n",
    "    train_and_test[col+\"_unique\"] = train_and_test[col].apply(\n",
    "        lambda x: 1 if count[x] == 1 else 0).values\n",
    "    fake_test[col+\"_unique\"] = 0 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "extraordinary-phrase",
   "metadata": {},
   "outputs": [],
   "source": [
    "real_test = train_and_test[train_and_test[\"ID_code\"].str.contains(\"test\")].copy()\n",
    "real_test.drop([\"target\"], axis=1, inplace=True)\n",
    "train = train_and_test[train_and_test[\"ID_code\"].str.contains(\"train\")].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "quantitative-iraqi",
   "metadata": {},
   "outputs": [],
   "source": [
    "test = pd.concat([real_test, fake_test], axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "instant-kitty",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID_code</th>\n",
       "      <th>target</th>\n",
       "      <th>var_0</th>\n",
       "      <th>var_1</th>\n",
       "      <th>var_2</th>\n",
       "      <th>var_3</th>\n",
       "      <th>var_4</th>\n",
       "      <th>var_5</th>\n",
       "      <th>var_6</th>\n",
       "      <th>var_7</th>\n",
       "      <th>...</th>\n",
       "      <th>var_190_unique</th>\n",
       "      <th>var_191_unique</th>\n",
       "      <th>var_192_unique</th>\n",
       "      <th>var_193_unique</th>\n",
       "      <th>var_194_unique</th>\n",
       "      <th>var_195_unique</th>\n",
       "      <th>var_196_unique</th>\n",
       "      <th>var_197_unique</th>\n",
       "      <th>var_198_unique</th>\n",
       "      <th>var_199_unique</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>train_0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.9255</td>\n",
       "      <td>-6.7863</td>\n",
       "      <td>11.9081</td>\n",
       "      <td>5.0930</td>\n",
       "      <td>11.4607</td>\n",
       "      <td>-9.2834</td>\n",
       "      <td>5.1187</td>\n",
       "      <td>18.6266</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>train_1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.5006</td>\n",
       "      <td>-4.1473</td>\n",
       "      <td>13.8588</td>\n",
       "      <td>5.3890</td>\n",
       "      <td>12.3622</td>\n",
       "      <td>7.0433</td>\n",
       "      <td>5.6208</td>\n",
       "      <td>16.5338</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>train_2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.6093</td>\n",
       "      <td>-2.7457</td>\n",
       "      <td>12.0805</td>\n",
       "      <td>7.8928</td>\n",
       "      <td>10.5825</td>\n",
       "      <td>-9.0837</td>\n",
       "      <td>6.9427</td>\n",
       "      <td>14.6155</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>train_3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.0604</td>\n",
       "      <td>-2.1518</td>\n",
       "      <td>8.9522</td>\n",
       "      <td>7.1957</td>\n",
       "      <td>12.5846</td>\n",
       "      <td>-1.8361</td>\n",
       "      <td>5.8428</td>\n",
       "      <td>14.9250</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>train_4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.8369</td>\n",
       "      <td>-1.4834</td>\n",
       "      <td>12.8746</td>\n",
       "      <td>6.6375</td>\n",
       "      <td>12.2772</td>\n",
       "      <td>2.4486</td>\n",
       "      <td>5.9405</td>\n",
       "      <td>19.2514</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199995</th>\n",
       "      <td>train_199995</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.4880</td>\n",
       "      <td>-0.4956</td>\n",
       "      <td>8.2622</td>\n",
       "      <td>3.5142</td>\n",
       "      <td>10.3404</td>\n",
       "      <td>11.6081</td>\n",
       "      <td>5.6709</td>\n",
       "      <td>15.1516</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199996</th>\n",
       "      <td>train_199996</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.9149</td>\n",
       "      <td>-2.4484</td>\n",
       "      <td>16.7052</td>\n",
       "      <td>6.6345</td>\n",
       "      <td>8.3096</td>\n",
       "      <td>-10.5628</td>\n",
       "      <td>5.8802</td>\n",
       "      <td>21.5940</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199997</th>\n",
       "      <td>train_199997</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.2232</td>\n",
       "      <td>-5.0518</td>\n",
       "      <td>10.5127</td>\n",
       "      <td>5.6456</td>\n",
       "      <td>9.3410</td>\n",
       "      <td>-5.4086</td>\n",
       "      <td>4.5555</td>\n",
       "      <td>21.5571</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199998</th>\n",
       "      <td>train_199998</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.7148</td>\n",
       "      <td>-8.6098</td>\n",
       "      <td>13.6104</td>\n",
       "      <td>5.7930</td>\n",
       "      <td>12.5173</td>\n",
       "      <td>0.5339</td>\n",
       "      <td>6.0479</td>\n",
       "      <td>17.0152</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199999</th>\n",
       "      <td>train_199999</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.8762</td>\n",
       "      <td>-5.7105</td>\n",
       "      <td>12.1183</td>\n",
       "      <td>8.0328</td>\n",
       "      <td>11.5577</td>\n",
       "      <td>0.3488</td>\n",
       "      <td>5.2839</td>\n",
       "      <td>15.2058</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>200000 rows × 402 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             ID_code  target    var_0   var_1    var_2   var_3    var_4  \\\n",
       "0            train_0     0.0   8.9255 -6.7863  11.9081  5.0930  11.4607   \n",
       "1            train_1     0.0  11.5006 -4.1473  13.8588  5.3890  12.3622   \n",
       "2            train_2     0.0   8.6093 -2.7457  12.0805  7.8928  10.5825   \n",
       "3            train_3     0.0  11.0604 -2.1518   8.9522  7.1957  12.5846   \n",
       "4            train_4     0.0   9.8369 -1.4834  12.8746  6.6375  12.2772   \n",
       "...              ...     ...      ...     ...      ...     ...      ...   \n",
       "199995  train_199995     0.0  11.4880 -0.4956   8.2622  3.5142  10.3404   \n",
       "199996  train_199996     0.0   4.9149 -2.4484  16.7052  6.6345   8.3096   \n",
       "199997  train_199997     0.0  11.2232 -5.0518  10.5127  5.6456   9.3410   \n",
       "199998  train_199998     0.0   9.7148 -8.6098  13.6104  5.7930  12.5173   \n",
       "199999  train_199999     0.0  10.8762 -5.7105  12.1183  8.0328  11.5577   \n",
       "\n",
       "          var_5   var_6    var_7  ...  var_190_unique  var_191_unique  \\\n",
       "0       -9.2834  5.1187  18.6266  ...               0               0   \n",
       "1        7.0433  5.6208  16.5338  ...               0               0   \n",
       "2       -9.0837  6.9427  14.6155  ...               0               0   \n",
       "3       -1.8361  5.8428  14.9250  ...               0               0   \n",
       "4        2.4486  5.9405  19.2514  ...               0               0   \n",
       "...         ...     ...      ...  ...             ...             ...   \n",
       "199995  11.6081  5.6709  15.1516  ...               0               1   \n",
       "199996 -10.5628  5.8802  21.5940  ...               0               0   \n",
       "199997  -5.4086  4.5555  21.5571  ...               0               0   \n",
       "199998   0.5339  6.0479  17.0152  ...               0               0   \n",
       "199999   0.3488  5.2839  15.2058  ...               0               0   \n",
       "\n",
       "        var_192_unique  var_193_unique  var_194_unique  var_195_unique  \\\n",
       "0                    0               0               0               0   \n",
       "1                    0               0               0               0   \n",
       "2                    0               0               0               0   \n",
       "3                    0               0               0               0   \n",
       "4                    1               1               1               0   \n",
       "...                ...             ...             ...             ...   \n",
       "199995               0               0               0               0   \n",
       "199996               0               1               0               0   \n",
       "199997               0               0               0               0   \n",
       "199998               0               0               0               0   \n",
       "199999               0               0               0               0   \n",
       "\n",
       "        var_196_unique  var_197_unique  var_198_unique  var_199_unique  \n",
       "0                    0               0               0               0  \n",
       "1                    0               0               0               0  \n",
       "2                    0               0               0               0  \n",
       "3                    0               0               0               0  \n",
       "4                    0               0               0               0  \n",
       "...                ...             ...             ...             ...  \n",
       "199995               1               0               0               0  \n",
       "199996               0               0               0               0  \n",
       "199997               0               0               0               1  \n",
       "199998               0               0               0               1  \n",
       "199999               0               0               0               1  \n",
       "\n",
       "[200000 rows x 402 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "human-japanese",
   "metadata": {},
   "outputs": [],
   "source": [
    "train.to_csv(\"new_shiny_train.csv\", index=False)\n",
    "test.to_csv(\"new_shiny_test.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "outer-walter",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID_code</th>\n",
       "      <th>target</th>\n",
       "      <th>var_0</th>\n",
       "      <th>var_1</th>\n",
       "      <th>var_2</th>\n",
       "      <th>var_3</th>\n",
       "      <th>var_4</th>\n",
       "      <th>var_5</th>\n",
       "      <th>var_6</th>\n",
       "      <th>var_7</th>\n",
       "      <th>...</th>\n",
       "      <th>var_190_unique</th>\n",
       "      <th>var_191_unique</th>\n",
       "      <th>var_192_unique</th>\n",
       "      <th>var_193_unique</th>\n",
       "      <th>var_194_unique</th>\n",
       "      <th>var_195_unique</th>\n",
       "      <th>var_196_unique</th>\n",
       "      <th>var_197_unique</th>\n",
       "      <th>var_198_unique</th>\n",
       "      <th>var_199_unique</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>train_0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.9255</td>\n",
       "      <td>-6.7863</td>\n",
       "      <td>11.9081</td>\n",
       "      <td>5.0930</td>\n",
       "      <td>11.4607</td>\n",
       "      <td>-9.2834</td>\n",
       "      <td>5.1187</td>\n",
       "      <td>18.6266</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>train_1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.5006</td>\n",
       "      <td>-4.1473</td>\n",
       "      <td>13.8588</td>\n",
       "      <td>5.3890</td>\n",
       "      <td>12.3622</td>\n",
       "      <td>7.0433</td>\n",
       "      <td>5.6208</td>\n",
       "      <td>16.5338</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>train_2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.6093</td>\n",
       "      <td>-2.7457</td>\n",
       "      <td>12.0805</td>\n",
       "      <td>7.8928</td>\n",
       "      <td>10.5825</td>\n",
       "      <td>-9.0837</td>\n",
       "      <td>6.9427</td>\n",
       "      <td>14.6155</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>train_3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.0604</td>\n",
       "      <td>-2.1518</td>\n",
       "      <td>8.9522</td>\n",
       "      <td>7.1957</td>\n",
       "      <td>12.5846</td>\n",
       "      <td>-1.8361</td>\n",
       "      <td>5.8428</td>\n",
       "      <td>14.9250</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>train_4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.8369</td>\n",
       "      <td>-1.4834</td>\n",
       "      <td>12.8746</td>\n",
       "      <td>6.6375</td>\n",
       "      <td>12.2772</td>\n",
       "      <td>2.4486</td>\n",
       "      <td>5.9405</td>\n",
       "      <td>19.2514</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199995</th>\n",
       "      <td>train_199995</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.4880</td>\n",
       "      <td>-0.4956</td>\n",
       "      <td>8.2622</td>\n",
       "      <td>3.5142</td>\n",
       "      <td>10.3404</td>\n",
       "      <td>11.6081</td>\n",
       "      <td>5.6709</td>\n",
       "      <td>15.1516</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199996</th>\n",
       "      <td>train_199996</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.9149</td>\n",
       "      <td>-2.4484</td>\n",
       "      <td>16.7052</td>\n",
       "      <td>6.6345</td>\n",
       "      <td>8.3096</td>\n",
       "      <td>-10.5628</td>\n",
       "      <td>5.8802</td>\n",
       "      <td>21.5940</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199997</th>\n",
       "      <td>train_199997</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.2232</td>\n",
       "      <td>-5.0518</td>\n",
       "      <td>10.5127</td>\n",
       "      <td>5.6456</td>\n",
       "      <td>9.3410</td>\n",
       "      <td>-5.4086</td>\n",
       "      <td>4.5555</td>\n",
       "      <td>21.5571</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199998</th>\n",
       "      <td>train_199998</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.7148</td>\n",
       "      <td>-8.6098</td>\n",
       "      <td>13.6104</td>\n",
       "      <td>5.7930</td>\n",
       "      <td>12.5173</td>\n",
       "      <td>0.5339</td>\n",
       "      <td>6.0479</td>\n",
       "      <td>17.0152</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199999</th>\n",
       "      <td>train_199999</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.8762</td>\n",
       "      <td>-5.7105</td>\n",
       "      <td>12.1183</td>\n",
       "      <td>8.0328</td>\n",
       "      <td>11.5577</td>\n",
       "      <td>0.3488</td>\n",
       "      <td>5.2839</td>\n",
       "      <td>15.2058</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>200000 rows × 402 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             ID_code  target    var_0   var_1    var_2   var_3    var_4  \\\n",
       "0            train_0     0.0   8.9255 -6.7863  11.9081  5.0930  11.4607   \n",
       "1            train_1     0.0  11.5006 -4.1473  13.8588  5.3890  12.3622   \n",
       "2            train_2     0.0   8.6093 -2.7457  12.0805  7.8928  10.5825   \n",
       "3            train_3     0.0  11.0604 -2.1518   8.9522  7.1957  12.5846   \n",
       "4            train_4     0.0   9.8369 -1.4834  12.8746  6.6375  12.2772   \n",
       "...              ...     ...      ...     ...      ...     ...      ...   \n",
       "199995  train_199995     0.0  11.4880 -0.4956   8.2622  3.5142  10.3404   \n",
       "199996  train_199996     0.0   4.9149 -2.4484  16.7052  6.6345   8.3096   \n",
       "199997  train_199997     0.0  11.2232 -5.0518  10.5127  5.6456   9.3410   \n",
       "199998  train_199998     0.0   9.7148 -8.6098  13.6104  5.7930  12.5173   \n",
       "199999  train_199999     0.0  10.8762 -5.7105  12.1183  8.0328  11.5577   \n",
       "\n",
       "          var_5   var_6    var_7  ...  var_190_unique  var_191_unique  \\\n",
       "0       -9.2834  5.1187  18.6266  ...               0               0   \n",
       "1        7.0433  5.6208  16.5338  ...               0               0   \n",
       "2       -9.0837  6.9427  14.6155  ...               0               0   \n",
       "3       -1.8361  5.8428  14.9250  ...               0               0   \n",
       "4        2.4486  5.9405  19.2514  ...               0               0   \n",
       "...         ...     ...      ...  ...             ...             ...   \n",
       "199995  11.6081  5.6709  15.1516  ...               0               1   \n",
       "199996 -10.5628  5.8802  21.5940  ...               0               0   \n",
       "199997  -5.4086  4.5555  21.5571  ...               0               0   \n",
       "199998   0.5339  6.0479  17.0152  ...               0               0   \n",
       "199999   0.3488  5.2839  15.2058  ...               0               0   \n",
       "\n",
       "        var_192_unique  var_193_unique  var_194_unique  var_195_unique  \\\n",
       "0                    0               0               0               0   \n",
       "1                    0               0               0               0   \n",
       "2                    0               0               0               0   \n",
       "3                    0               0               0               0   \n",
       "4                    1               1               1               0   \n",
       "...                ...             ...             ...             ...   \n",
       "199995               0               0               0               0   \n",
       "199996               0               1               0               0   \n",
       "199997               0               0               0               0   \n",
       "199998               0               0               0               0   \n",
       "199999               0               0               0               0   \n",
       "\n",
       "        var_196_unique  var_197_unique  var_198_unique  var_199_unique  \n",
       "0                    0               0               0               0  \n",
       "1                    0               0               0               0  \n",
       "2                    0               0               0               0  \n",
       "3                    0               0               0               0  \n",
       "4                    0               0               0               0  \n",
       "...                ...             ...             ...             ...  \n",
       "199995               1               0               0               0  \n",
       "199996               0               0               0               0  \n",
       "199997               0               0               0               1  \n",
       "199998               0               0               0               1  \n",
       "199999               0               0               0               1  \n",
       "\n",
       "[200000 rows x 402 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "therapeutic-scratch",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID_code</th>\n",
       "      <th>var_0</th>\n",
       "      <th>var_1</th>\n",
       "      <th>var_2</th>\n",
       "      <th>var_3</th>\n",
       "      <th>var_4</th>\n",
       "      <th>var_5</th>\n",
       "      <th>var_6</th>\n",
       "      <th>var_7</th>\n",
       "      <th>var_8</th>\n",
       "      <th>...</th>\n",
       "      <th>var_190_unique</th>\n",
       "      <th>var_191_unique</th>\n",
       "      <th>var_192_unique</th>\n",
       "      <th>var_193_unique</th>\n",
       "      <th>var_194_unique</th>\n",
       "      <th>var_195_unique</th>\n",
       "      <th>var_196_unique</th>\n",
       "      <th>var_197_unique</th>\n",
       "      <th>var_198_unique</th>\n",
       "      <th>var_199_unique</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>test_3</td>\n",
       "      <td>8.5374</td>\n",
       "      <td>-1.3222</td>\n",
       "      <td>12.0220</td>\n",
       "      <td>6.5749</td>\n",
       "      <td>8.8458</td>\n",
       "      <td>3.1744</td>\n",
       "      <td>4.9397</td>\n",
       "      <td>20.5660</td>\n",
       "      <td>3.3755</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>test_7</td>\n",
       "      <td>17.3035</td>\n",
       "      <td>-2.4212</td>\n",
       "      <td>13.3989</td>\n",
       "      <td>8.3998</td>\n",
       "      <td>11.0777</td>\n",
       "      <td>9.6449</td>\n",
       "      <td>5.9596</td>\n",
       "      <td>17.8477</td>\n",
       "      <td>-4.8068</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>test_11</td>\n",
       "      <td>10.6137</td>\n",
       "      <td>-2.1898</td>\n",
       "      <td>8.9090</td>\n",
       "      <td>3.8014</td>\n",
       "      <td>13.8602</td>\n",
       "      <td>-5.9802</td>\n",
       "      <td>5.5515</td>\n",
       "      <td>15.4716</td>\n",
       "      <td>-0.1714</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>test_15</td>\n",
       "      <td>14.8595</td>\n",
       "      <td>-4.5378</td>\n",
       "      <td>13.6483</td>\n",
       "      <td>5.6480</td>\n",
       "      <td>9.9144</td>\n",
       "      <td>1.5190</td>\n",
       "      <td>5.0358</td>\n",
       "      <td>13.4524</td>\n",
       "      <td>-2.5419</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>test_16</td>\n",
       "      <td>14.1732</td>\n",
       "      <td>-5.1490</td>\n",
       "      <td>9.7591</td>\n",
       "      <td>3.7316</td>\n",
       "      <td>10.3700</td>\n",
       "      <td>-21.9202</td>\n",
       "      <td>7.7130</td>\n",
       "      <td>18.8749</td>\n",
       "      <td>0.4680</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 401 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "    ID_code    var_0   var_1    var_2   var_3    var_4    var_5   var_6  \\\n",
       "3    test_3   8.5374 -1.3222  12.0220  6.5749   8.8458   3.1744  4.9397   \n",
       "7    test_7  17.3035 -2.4212  13.3989  8.3998  11.0777   9.6449  5.9596   \n",
       "11  test_11  10.6137 -2.1898   8.9090  3.8014  13.8602  -5.9802  5.5515   \n",
       "15  test_15  14.8595 -4.5378  13.6483  5.6480   9.9144   1.5190  5.0358   \n",
       "16  test_16  14.1732 -5.1490   9.7591  3.7316  10.3700 -21.9202  7.7130   \n",
       "\n",
       "      var_7   var_8  ...  var_190_unique  var_191_unique  var_192_unique  \\\n",
       "3   20.5660  3.3755  ...               0               0               0   \n",
       "7   17.8477 -4.8068  ...               0               0               0   \n",
       "11  15.4716 -0.1714  ...               0               0               0   \n",
       "15  13.4524 -2.5419  ...               0               0               1   \n",
       "16  18.8749  0.4680  ...               0               0               0   \n",
       "\n",
       "    var_193_unique  var_194_unique  var_195_unique  var_196_unique  \\\n",
       "3                0               0               0               0   \n",
       "7                0               0               0               0   \n",
       "11               0               0               0               1   \n",
       "15               0               0               0               0   \n",
       "16               0               0               0               0   \n",
       "\n",
       "    var_197_unique  var_198_unique  var_199_unique  \n",
       "3                0               0               1  \n",
       "7                0               1               0  \n",
       "11               0               0               1  \n",
       "15               0               0               1  \n",
       "16               0               0               0  \n",
       "\n",
       "[5 rows x 401 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head(5)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
